Mapping genetic effects of diverse mESC lines on cellular phenotypes with Census-seq

Author

Maddie Rea

Published

August 2, 2024

Maddie’s Presentation

Welcome to my presentation! Today I’m going to showcase what I’ve been doing over the summer. This is the culmination of most of my work, I welcome and encourage feedback before my poster presentation!

Code
library(ggplot2)
library(MASS) 
library(reshape2) 
library(reshape) 
library(patchwork)
library(ggpubr)
library(plotly)
library(dplyr)
library(readr)
library(crayon)
library(pheatmap)
Code
compare_vcfs = function(quilt_input,geno_input,census_input,x_order,base=FALSE){
  
  census_input$V1 = sub("plexWell-", "", census_input$V1)
  census_input$V1 = sub("_GT23-023.._.*", "", census_input$V1)
  quilt_input$DONOR = sub("plexWell-", "", quilt_input$DONOR)
  quilt_input$DONOR = sub("_GT23-023.._.*", "", quilt_input$DONOR)
  geno_input$DONOR = sub("plexWell-", "", geno_input$DONOR)
  geno_input$DONOR = sub("_GT23-023.._.*", "", geno_input$DONOR)
  
  
  census_input = census_input[order(census_input$V1),]
  quilt_input$READS = census_input$V2
  meltedbar = subset(quilt_input, select = c(DONOR,READS,COUNT))
  colnames(meltedbar) <- c('DONOR','Failed','Passed')
  meltedbar = melt(meltedbar,id="DONOR")
  colnames(meltedbar) <- c('Donor','Filtered','Reads')
  
  p1 <- ggplot(data=meltedbar, aes(x=Donor,y=Reads, fill=Filtered)) +
  geom_bar(position="identity",stat="identity") +
  scale_fill_manual(values=c("#D41159", "#69C561")) +
  scale_x_discrete(limits = x_order) +
  theme(
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
    panel.background = element_rect(fill='transparent'), 
    plot.background = element_rect(fill='transparent', color=NA), 
    panel.grid.major = element_blank(), 
    panel.grid.minor = element_blank(), 
    legend.background = element_rect(fill='transparent'), 
    legend.box.background = element_rect(fill='transparent') 
  ) + 
  labs(y = "Total Reads") +
  guides(fill=guide_legend(title="Reads QC"))
  
  if (base == FALSE){
    p1 <- p1 + theme(axis.text.x=element_blank(),
                     axis.ticks.x=element_blank(),
                     axis.title.x =element_blank()) 
  }
    
  
  quilt_input$G_REP = geno_input$REPRESENTATION
  quilt_input$Genoprobs = quilt_input$G_REP - quilt_input$KNOWN
  quilt_input$Quilt = quilt_input$REPRESENTATION - quilt_input$KNOWN
  meltedline = subset(quilt_input, select = c(DONOR,Quilt,Genoprobs))
  meltedline = melt(meltedline,id="DONOR")
  colnames(meltedline) <- c('Donor','VCF','Differences')
  p2 <- ggplot(data = meltedline) +
  geom_hline(yintercept=0,size=0.5)+
  geom_line(aes(x = Donor, y = Differences, colour = VCF, group = VCF)) +
  geom_point(aes(x = Donor, y = Differences, colour = VCF, group = VCF))+
  scale_x_discrete(limits = x_order) +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank()) +
  scale_color_manual(values=c('#063970', '#A69943')) +
  theme(
    panel.background = element_rect(fill='transparent'), 
    plot.background = element_rect(fill='transparent', color=NA), 
    panel.grid.major = element_blank(), 
    panel.grid.minor = element_blank(), 
    legend.background = element_rect(fill='transparent'), 
    legend.box.background = element_rect(fill='transparent') 
  ) + labs(y = "∆ Proportion")
  
  quilt_input$Genoprobs = ((quilt_input$G_REP - quilt_input$KNOWN) / quilt_input$KNOWN) * 100
  quilt_input$Quilt = ((quilt_input$REPRESENTATION - quilt_input$KNOWN) / quilt_input$KNOWN) * 100
  meltedline_error = subset(quilt_input, select = c(DONOR,Quilt,Genoprobs))
  meltedline_error = melt(meltedline_error,id="DONOR")
  colnames(meltedline_error) <- c('Donor','VCF','PercentError')
  p3 <- ggplot(data = meltedline_error) +
  geom_hline(yintercept=0,size=0.5)+
  geom_line(aes(x = Donor, y = PercentError, colour = VCF, group = VCF)) +
  geom_point(aes(x = Donor, y = PercentError, colour = VCF, group = VCF))+
  scale_x_discrete(limits = x_order) +
  scale_color_manual(values=c('#063970', '#A69943')) +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank()) +
  theme(
    panel.background = element_rect(fill='transparent'), 
    plot.background = element_rect(fill='transparent', color=NA), 
    panel.grid.major = element_blank(), 
    panel.grid.minor = element_blank(),
    legend.background = element_rect(fill='transparent'), 
    legend.box.background = element_rect(fill='transparent')
  ) + labs(y = "Percent Error")

stacked_plots <- p2 + p3 + p1 + plot_layout(ncol = 1)
stacked_plots
}
Code
log_census = read.csv("/projects/munger-lab/projects/census_seq_SSP2024/sixteen_vcfs_sample/log_census_report.tsv",header=FALSE,sep="\t")

log_census$V1 = sub("plexWell-", "", log_census$V1)
log_census$V1 = sub("_GT23-023.._.*", "", log_census$V1)

hold = log_census[order(log_census$V2),]
hold = hold$V1

Proportion versus Percent Error

Sixteen samples were randomly chosen from the pool of forty-eight and were proportioned in equal, logarithmic, and “random” proportions. Each bar represents the total reads a sample contributed to the pool of three million reads, with magenta reads failing the quality control. Above the difference in estimation versus true proportion for each VCF and their corresponding percent error is graphed.

Equal Proportions

Code
equal_quilt = read.csv("/projects/munger-lab/projects/census_seq_SSP2024/sixteen_vcfs_sample/equal_quilt.census.txt",header=TRUE,sep="\t",skip=2)
equal_geno = read.csv("/projects/munger-lab/projects/census_seq_SSP2024/sixteen_vcfs_sample/equal_geno.census.txt",header=TRUE,sep="\t",skip=2)
equal_census = read.csv("/projects/munger-lab/projects/census_seq_SSP2024/sixteen_vcfs_sample/equal_census_report.tsv",header=FALSE,sep="\t")

compare_vcfs(equal_quilt,equal_geno,equal_census,hold)
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

Code
ggsave("images/equal_proportions.png")
Saving 7 x 5 in image

Logarithmic Proportions

Code
log_quilt = read.csv("/projects/munger-lab/projects/census_seq_SSP2024/sixteen_vcfs_sample/log_quilt.census.txt",header=TRUE,sep="\t",skip=2)
log_geno = read.csv("/projects/munger-lab/projects/census_seq_SSP2024/sixteen_vcfs_sample/log_geno.census.txt",header=TRUE,sep="\t",skip=2)
log_census = read.csv("/projects/munger-lab/projects/census_seq_SSP2024/sixteen_vcfs_sample/log_census_report.tsv",header=FALSE,sep="\t")

compare_vcfs(log_quilt,log_geno,log_census,hold)

Code
ggsave("images/log_proportions.png")
Saving 7 x 5 in image

Random Proportion VCF Comparison

Code
random_quilt = read.csv("/projects/munger-lab/projects/census_seq_SSP2024/sixteen_vcfs_sample/random_quilt.census.txt",header=TRUE,sep="\t",skip=2)
random_geno = read.csv("/projects/munger-lab/projects/census_seq_SSP2024/sixteen_vcfs_sample/random_geno.census.txt",header=TRUE,sep="\t",skip=2)
random_census = read.csv("/projects/munger-lab/projects/census_seq_SSP2024/sixteen_vcfs_sample/random_census_report.tsv",header=FALSE,sep="\t")
compare_vcfs(random_quilt,random_geno,random_census,hold,base=TRUE)

Code
ggsave("images/random_proportions.png")
Saving 7 x 5 in image
Code
# library(reticulate)
# conda_create(envname = "myenv")
# conda_install( envname = "myenv", packages = c("seaborn","pandas","matplotlib","plotly"))
# use_condaenv(condaenv = "myenv",required = TRUE  )

3D Percent Error Graph

Code
sample_amounts <- c()
read_numbers <- c()
percent_errors <- c()
proportion_types <- c()

calculate_pe <- function(df) {
  df$PERCENT_ERROR <- abs(df$REPRESENTATION - df$KNOWN) / df$KNOWN * 100
  av_percent_error <- mean(df$PERCENT_ERROR)
  return(av_percent_error)
}

files <- list.files(path = "/projects/munger-lab/projects/census_seq_SSP2024/genoprobs_runs/", pattern = "my.census.txt", recursive = TRUE, full.names = TRUE)

for (file_path in files) {
  
  path_parts <- strsplit(file_path, "/")[[1]]
  sample_amount <- path_parts[length(path_parts) - 4]
  proportion_type <- path_parts[length(path_parts) - 3]
  read_number <- path_parts[length(path_parts) - 2]
  
  df <- read_tsv(file_path, skip = 2)
  percent_error <- calculate_pe(df)
  sample_amounts <- c(sample_amounts, sample_amount)
  read_numbers <- c(read_numbers, read_number)
  percent_errors <- c(percent_errors, percent_error)
  proportion_types <- c(proportion_types, proportion_type)
}

percent_errors <- as.numeric(percent_errors)

reads_to_numbers <- list(
  'h_thousand' = 100000,
  'o_million' = 1000000,
  't_million' = 10000000,
  'f_million' = 50000000,
  'h_million' = 100000000
)

read_numbers <- sapply(read_numbers, function(x) reads_to_numbers[[x]])

samples_to_numbers <- list(
  's4' = 4,
  's8' = 8,
  's16' = 16,
  's32' = 32,
  's48' = 48
)

sample_amounts <- sapply(sample_amounts, function(x) samples_to_numbers[[x]])

df <- data.frame(
  'Reads' = read_numbers,
  'Samples' = sample_amounts,
  'Percent_Error' = percent_errors,
  'Proportion_Type' = proportion_types
)

fig <- plot_ly(df, x = ~Samples, y = ~Reads, z = ~Percent_Error, color = ~Proportion_Type, colors = c('#009E73','#56B4E9','#F0E442'))
fig <- fig %>% add_markers(marker = list(opacity = 0.5))
fig <- fig %>% layout(scene = list(xaxis = list(title = 'Number of Samples'),
                     yaxis = list(title = 'Number of Reads'),
                     zaxis = list(title = 'Percent Error')))

fig

Random Proportion versus Percent Error Animated

Code
library(dplyr)
library(readr)
library(plotly)

read_numbers <- c()

calculate_pe <- function(df) {
  df$PERCENT_ERROR <- abs(df$REPRESENTATION - df$KNOWN) / df$KNOWN * 100
  av_percent_error <- mean(df$PERCENT_ERROR)
  return(av_percent_error)
}

files <- list.files(path = "/projects/munger-lab/projects/census_seq_SSP2024/genoprobs_runs/s48/random/", pattern = "my.census.txt", recursive = TRUE, full.names = TRUE)

results <- data.frame(Proportion = numeric(), Percent_Error = numeric(), Reads = character())

for (file_path in files) {
  
  path_parts <- strsplit(file_path, "/")[[1]]
  read_number <- path_parts[length(path_parts) - 2]
  run_number <- path_parts[length(path_parts) - 1]
  
  df <- read_tsv(file_path, skip = 2)
  
  df <- df %>%
    mutate(DONOR = paste0(DONOR, "_", run_number),
           Percent_Error = abs(REPRESENTATION - KNOWN) / KNOWN * 100,
           Reads = read_number)
  
  results <- rbind(results, df)
  
}


reads_to_numbers <- list(
  'h_thousand' = 100000,
  'o_million' = 1000000,
  't_million' = 10000000,
  'f_million' = 50000000,
  'h_million' = 100000000
)

results$Reads <- sapply(results$Reads, function(x) reads_to_numbers[[x]])
results$Reads <- as.integer(results$Reads)

results$DONOR <- as.factor(results$DONOR)

fig <- results %>%
  plot_ly(
    x = ~KNOWN,
    y = ~Percent_Error,
    frame = ~Reads,
    text = ~DONOR,
    hoverinfo = "text",
    type = 'scatter',
    mode = 'markers'
  )

fig

Logistic Proportion versus Percent Error Animated

Code
library(dplyr)
library(readr)
library(plotly)
library(gapminder)

read_numbers <- c()

calculate_pe <- function(df) {
  df$PERCENT_ERROR <- abs(df$REPRESENTATION - df$KNOWN) / df$KNOWN * 100
  av_percent_error <- mean(df$PERCENT_ERROR)
  return(av_percent_error)
}

files <- list.files(path = "/projects/munger-lab/projects/census_seq_SSP2024/genoprobs_runs/s48/log/", pattern = "my.census.txt", recursive = TRUE, full.names = TRUE)

results <- data.frame(Proportion = numeric(), Percent_Error = numeric(), Reads = character())

for (file_path in files) {
  
  path_parts <- strsplit(file_path, "/")[[1]]
  read_number <- path_parts[length(path_parts) - 2]
  run_number <- path_parts[length(path_parts) - 1]
  
  df <- read_tsv(file_path, skip = 2)
  
  df <- df %>%
    mutate(DONOR = paste0(DONOR, "_", run_number),
           Percent_Error = abs(REPRESENTATION - KNOWN) / KNOWN * 100,
           Reads = read_number)
  
  results <- rbind(results, df)
  
}


reads_to_numbers <- list(
  'h_thousand' = 100000,
  'o_million' = 1000000,
  't_million' = 10000000,
  'f_million' = 50000000,
  'h_million' = 100000000
)

results$Reads <- sapply(results$Reads, function(x) reads_to_numbers[[x]])
results$Reads <- as.integer(results$Reads)

results$DONOR <- as.factor(results$DONOR)

fig <- results %>%
  plot_ly(
    x = ~KNOWN,
    y = ~Percent_Error,
    frame = ~Reads,
    text = ~DONOR,
    hoverinfo = "text",
    type = 'scatter',
    mode = 'markers'
  )

fig

Different Aligners

Code
bowtie = read.csv("/projects/munger-lab/projects/census_seq_SSP2024/diff_aligners_test/bowtie.census.txt",sep="\t",skip=2,header=TRUE)
mini = read.csv("/projects/munger-lab/projects/census_seq_SSP2024/diff_aligners_test/mini.census.txt",sep="\t",skip=2,header=TRUE)
bwa = read.csv("/projects/munger-lab/projects/census_seq_SSP2024/diff_aligners_test/bwa.census.txt",sep="\t",skip=2,header=TRUE)
STAR = read.csv("/projects/munger-lab/projects/census_seq_SSP2024/diff_aligners_test/STAR.census.txt",sep="\t",skip=2,header=TRUE)

bwa$STAR = STAR$REPRESENTATION
bwa$minimap = mini$REPRESENTATION
bwa$bwa = bwa$REPRESENTATION
bwa$bowtie2 = bowtie$REPRESENTATION

bwa$STAR = (abs(bwa$STAR - bwa$KNOWN) / bwa$KNOWN) * 100
bwa$minimap = (abs(bwa$mini - bwa$KNOWN) / bwa$KNOWN) * 100
bwa$bwa = (abs(bwa$bwa - bwa$KNOWN) / bwa$KNOWN) * 100
bwa$bowtie2 = (abs(bwa$bowtie2 - bwa$KNOWN) / bwa$KNOWN) * 100

meltedviolin = subset(bwa, select = c(DONOR,bwa,bowtie2,minimap,STAR))
meltedviolin = melt(meltedviolin,id="DONOR")
colnames(meltedviolin) <- c('Donor','Aligner','PercentError')

ggplot(meltedviolin, aes(x=Aligner,y=PercentError,fill=Aligner)) + 
  geom_violin() +
  scale_fill_brewer(palette="Dark2") + 
  geom_dotplot(binaxis='y',stackdir = 'center',dotsize=1,color="black") +
  labs(title="Aligner Type versus Percent Error",x="Aligner", y = "Percent Error") +
  theme_minimal() 

Code
bowtie = read.csv("/projects/munger-lab/projects/census_seq_SSP2024/diff_aligners_test_48/bowtie.census.txt",sep="\t",skip=2,header=TRUE)
mini = read.csv("/projects/munger-lab/projects/census_seq_SSP2024/diff_aligners_test_48/mini.census.txt",sep="\t",skip=2,header=TRUE)
bwa = read.csv("/projects/munger-lab/projects/census_seq_SSP2024/diff_aligners_test_48/bwa.census.txt",sep="\t",skip=2,header=TRUE)
STAR = read.csv("/projects/munger-lab/projects/census_seq_SSP2024/diff_aligners_test_48/STAR.census.txt",sep="\t",skip=2,header=TRUE)

bwa$STAR = STAR$REPRESENTATION
bwa$minimap = mini$REPRESENTATION
bwa$bwa = bwa$REPRESENTATION
bwa$bowtie2 = bowtie$REPRESENTATION

bwa$STAR = ((bwa$STAR - bwa$KNOWN) / bwa$KNOWN) * 100
bwa$minimap = ((bwa$mini - bwa$KNOWN) / bwa$KNOWN) * 100
bwa$bwa = ((bwa$bwa - bwa$KNOWN) / bwa$KNOWN) * 100
bwa$bowtie2 = ((bwa$bowtie2 - bwa$KNOWN) / bwa$KNOWN) * 100

meltedviolin = subset(bwa, select = c(DONOR,bwa,minimap,bowtie2,STAR))
meltedviolin = melt(meltedviolin,id="DONOR")
colnames(meltedviolin) <- c('Donor','Aligner','PercentError')

ggplot(meltedviolin, aes(x=Aligner,y=PercentError,fill=Aligner)) + 
  geom_abline(slope=0, intercept=0,colour='#000000') +
  geom_violin() +
  scale_fill_brewer(palette="Dark2") + 
  geom_dotplot(binaxis='y',stackdir = 'center',dotsize=1,color="black") +
  labs(title="Aligner Type versus Percent Error",x="Aligner", y = "Percent Error") +
  theme_minimal() +
  theme(legend.position="none")

Code
ggsave("images/diff_aligners.png")

Aligner Heatmap

Code
bowtie = read.csv("/projects/munger-lab/projects/census_seq_SSP2024/diff_aligners_test/bowtie.census.txt",sep="\t",skip=2,header=TRUE)
mini = read.csv("/projects/munger-lab/projects/census_seq_SSP2024/diff_aligners_test/mini.census.txt",sep="\t",skip=2,header=TRUE)
bwa = read.csv("/projects/munger-lab/projects/census_seq_SSP2024/diff_aligners_test/bwa.census.txt",sep="\t",skip=2,header=TRUE)
STAR = read.csv("/projects/munger-lab/projects/census_seq_SSP2024/diff_aligners_test/STAR.census.txt",sep="\t",skip=2,header=TRUE)

bwa$STAR = STAR$REPRESENTATION
bwa$mini = mini$REPRESENTATION
bwa$bwa = bwa$REPRESENTATION
bwa$bowtie = bowtie$REPRESENTATION

bwa$STAR = ((bwa$STAR - bwa$KNOWN) / bwa$KNOWN) * 100
bwa$mini = ((bwa$mini - bwa$KNOWN) / bwa$KNOWN) * 100
bwa$bwa = ((bwa$bwa - bwa$KNOWN) / bwa$KNOWN) * 100
bwa$bowtie = ((bwa$bowtie - bwa$KNOWN) / bwa$KNOWN) * 100

bwa$DONOR = sub("plexWell-", "", bwa$DONOR)
bwa$DONOR = sub("_GT23-023.._.*", "", bwa$DONOR)

meltedviolin = subset(bwa, select = c(DONOR,bwa,bowtie,mini,STAR))

df = as.data.frame(meltedviolin)
rownames(df) = df$DONOR
df$DONOR = NULL

pheatmap(df)

Code
bowtie = read.csv("/projects/munger-lab/projects/census_seq_SSP2024/diff_aligners_test_48/bowtie.census.txt",sep="\t",skip=2,header=TRUE)
mini = read.csv("/projects/munger-lab/projects/census_seq_SSP2024/diff_aligners_test_48/mini.census.txt",sep="\t",skip=2,header=TRUE)
bwa = read.csv("/projects/munger-lab/projects/census_seq_SSP2024/diff_aligners_test_48/bwa.census.txt",sep="\t",skip=2,header=TRUE)
STAR = read.csv("/projects/munger-lab/projects/census_seq_SSP2024/diff_aligners_test_48/STAR.census.txt",sep="\t",skip=2,header=TRUE)

bwa$STAR = STAR$REPRESENTATION
bwa$mini = mini$REPRESENTATION
bwa$bwa = bwa$REPRESENTATION
bwa$bowtie = bowtie$REPRESENTATION

bwa$STAR = ((bwa$STAR - bwa$KNOWN) / bwa$KNOWN) * 100
bwa$mini = ((bwa$mini - bwa$KNOWN) / bwa$KNOWN) * 100
bwa$bwa = ((bwa$bwa - bwa$KNOWN) / bwa$KNOWN) * 100
bwa$bowtie = ((bwa$bowtie - bwa$KNOWN) / bwa$KNOWN) * 100

bwa$DONOR = sub("plexWell-", "", bwa$DONOR)
bwa$DONOR = sub("_GT23-023.._.*", "", bwa$DONOR)

meltedviolin = subset(bwa, select = c(DONOR,bwa,bowtie,mini,STAR))

df = as.data.frame(meltedviolin)
rownames(df) = df$DONOR
df$DONOR = NULL

p1 <- pheatmap(df,show_colnames = FALSE,fontsize_row = 6)

Code
save_pheatmap_pdf <- function(x, filename, width=10.5, height=6.5) {
   stopifnot(!missing(x))
   stopifnot(!missing(filename))
   pdf(filename, width=width, height=height)
   grid::grid.newpage()
   grid::grid.draw(x$gtable)
   dev.off()
}
save_pheatmap_pdf(p1, "images/pheatmap2.pdf")
png 
  2 
Code
bowtie = read.csv("/projects/munger-lab/projects/census_seq_SSP2024/diff_aligners_test/bowtie.census.txt",sep="\t",skip=2,header=TRUE)
mini = read.csv("/projects/munger-lab/projects/census_seq_SSP2024/diff_aligners_test/mini.census.txt",sep="\t",skip=2,header=TRUE)
bwa = read.csv("/projects/munger-lab/projects/census_seq_SSP2024/diff_aligners_test/bwa.census.txt",sep="\t",skip=2,header=TRUE)
STAR = read.csv("/projects/munger-lab/projects/census_seq_SSP2024/diff_aligners_test/STAR.census.txt",sep="\t",skip=2,header=TRUE)

bwa$STAR = STAR$REPRESENTATION - STAR$KNOWN
bwa$mini = mini$REPRESENTATION - mini$KNOWN
bwa$bwa = bwa$REPRESENTATION - bwa$KNOWN
bwa$bowtie = bowtie$REPRESENTATION - bowtie$KNOWN

bwa$DONOR = sub("plexWell-", "", bwa$DONOR)
bwa$DONOR = sub("_GT23-023.._.*", "", bwa$DONOR)

meltedviolin = subset(bwa, select = c(DONOR,bwa,bowtie,mini,STAR))

df = as.data.frame(meltedviolin)
rownames(df) = df$DONOR
df$DONOR = NULL

pheatmap(df)

One Million Reads In Silico versus Inferred

Code
files <- list.files(path = "/projects/munger-lab/projects/census_seq_SSP2024/replication_runs/tiny_prop_one", pattern = "my.census.txt", recursive = TRUE, full.names = TRUE)
dirs <- dirname(files)

merged_dfs <- list()

for (dir in dirs) {
  # Construct the file paths
  file_path_txt <- file.path(dir, "my.census.txt")
  file_path_tsv <- file.path(dir, "census_report.tsv")
  df_txt <- read_tsv(file_path_txt, skip = 2)
  df_tsv <- read_tsv(file_path_tsv, col_names=FALSE)
  merged_df <- merge(df_txt, df_tsv, by.x = "DONOR", by.y = "X1", all.x = TRUE)
  merged_df$X2 = merged_df$X2 / 1000000
  
  merged_dfs[[length(merged_dfs) + 1]] <- merged_df
}

final_df <- do.call(rbind, merged_dfs)
final_df <- final_df[-which(final_df$X2 > 0.1),]


p1 <- ggplot(final_df, aes(x = X2, y = REPRESENTATION)) + 
  geom_point(shape=1) + 
  geom_abline(slope=1, intercept=0,colour='#E41A1C') +
  scale_x_continuous(name="Donor Fraction of village (in silico mixture)", limits=c(0, 0.016)) +
  scale_y_continuous(name="Donor Fraction of village (inferred by Census-seq)", limits=c(0, 0.016)) +
  theme_minimal()


final_df$Percent_Error = abs(final_df$REPRESENTATION - final_df$X2) / final_df$X2 * 100

final_df_one = final_df %>%
  group_by(X2) %>%
  mutate(Median_Percent_Error = median(Percent_Error))

p2 <- ggplot(final_df_one, aes(x = X2, y = Median_Percent_Error)) + 
  geom_point() + 
  geom_line() + 
  scale_x_continuous(name="Donor Fraction of village (in silico mixture)", limits=c(0, 0.0105)) +
  scale_y_continuous(name="Median Percent Error", limits=c(0, 100)) +
  theme_minimal()

p1 + p2

One Million Reads Passed Reads versus Inferred

Code
files <- list.files(path = "/projects/munger-lab/projects/census_seq_SSP2024/replication_runs/tiny_prop_one", pattern = "my.census.txt", recursive = TRUE, full.names = TRUE)
dirs <- dirname(files)

merged_dfs <- list()

for (dir in dirs) {
  # Construct the file paths
  file_path_txt <- file.path(dir, "my.census.txt")
  file_path_tsv <- file.path(dir, "census_report.tsv")
  df_txt <- read_tsv(file_path_txt, skip = 2)
  df_tsv <- read_tsv(file_path_tsv, col_names=FALSE)
  merged_df <- merge(df_txt, df_tsv, by.x = "DONOR", by.y = "X1", all.x = TRUE)
  merged_df$X2 = merged_df$X2 / 1000000
  
  merged_dfs[[length(merged_dfs) + 1]] <- merged_df
}

final_df <- do.call(rbind, merged_dfs)
final_df <- final_df[-which(final_df$X2 > 0.1),]


p1 <- ggplot(final_df, aes(x = KNOWN, y = REPRESENTATION)) + 
  geom_point(shape=1) + 
  geom_abline(slope=1, intercept=0,colour='#E41A1C') +
  scale_x_continuous(name="Donor Fraction of village (passed reads)", limits=c(0, 0.016)) +
  scale_y_continuous(name="Donor Fraction of village (inferred by Census-seq)", limits=c(0, 0.016)) +
  theme_minimal()

final_df$Percent_Error = abs(final_df$REPRESENTATION - final_df$KNOWN) / final_df$KNOWN * 100

final_df = final_df %>%
  group_by(KNOWN) %>%
  mutate(Median_Percent_Error = median(Percent_Error))

p2 <- ggplot(final_df, aes(x = KNOWN, y = Median_Percent_Error)) + 
  geom_point() + 
  geom_line() + 
  scale_x_continuous(name="Donor Fraction of village (passed reads)", limits=c(0, 0.0105)) +
  scale_y_continuous(name="Median Percent Error", limits=c(0, 100)) +
  theme_minimal()

p1 + p2

Ten Million Reads In Silico versus Inferred

Code
files <- list.files(path = "/projects/munger-lab/projects/census_seq_SSP2024/replication_runs/tiny_prop_ten", pattern = "my.census.txt", recursive = TRUE, full.names = TRUE)
dirs <- dirname(files)

merged_dfs <- list()

for (dir in dirs) {
  # Construct the file paths
  file_path_txt <- file.path(dir, "my.census.txt")
  file_path_tsv <- file.path(dir, "census_report.tsv")
  df_txt <- read_tsv(file_path_txt, skip = 2)
  df_tsv <- read_tsv(file_path_tsv, col_names=FALSE)
  merged_df <- merge(df_txt, df_tsv, by.x = "DONOR", by.y = "X1", all.x = TRUE)
  merged_df$X2 = merged_df$X2 / 10000000
  
  merged_dfs[[length(merged_dfs) + 1]] <- merged_df
}

final_df <- do.call(rbind, merged_dfs)
final_df <- final_df[-which(final_df$X2 > 0.1),]


p1 <- ggplot(final_df, aes(x = X2, y = REPRESENTATION)) + 
  geom_point(shape=1) + 
  geom_abline(slope=1, intercept=0,colour='#E41A1C') +
  scale_x_continuous(name="Donor Fraction of village (in silico mixture)", limits=c(0, 0.016)) +
  scale_y_continuous(name="Donor Fraction of village (inferred by Census-seq)", limits=c(0, 0.016)) +
  theme_minimal()


final_df$Percent_Error = abs(final_df$REPRESENTATION - final_df$X2) / final_df$X2 * 100

final_df_ten = final_df %>%
  group_by(X2) %>%
  mutate(Median_Percent_Error = median(Percent_Error))

p2 <- ggplot(final_df_ten, aes(x = X2, y = Median_Percent_Error)) + 
  geom_point() + 
  geom_line() + 
  scale_x_continuous(name="Donor Fraction of village (in silico mixture)", limits=c(0, 0.0105)) +
  scale_y_continuous(name="Median Percent Error", limits=c(0, 100)) +
  theme_minimal()

p1 + p2

Ten Million Reads Passed Reads versus Inferred

Code
files <- list.files(path = "/projects/munger-lab/projects/census_seq_SSP2024/replication_runs/tiny_prop_ten", pattern = "my.census.txt", recursive = TRUE, full.names = TRUE)
dirs <- dirname(files)

merged_dfs <- list()

for (dir in dirs) {
  # Construct the file paths
  file_path_txt <- file.path(dir, "my.census.txt")
  file_path_tsv <- file.path(dir, "census_report.tsv")
  df_txt <- read_tsv(file_path_txt, skip = 2)
  df_tsv <- read_tsv(file_path_tsv, col_names=FALSE)
  merged_df <- merge(df_txt, df_tsv, by.x = "DONOR", by.y = "X1", all.x = TRUE)
  merged_df$X2 = merged_df$X2 / 10000000
  
  merged_dfs[[length(merged_dfs) + 1]] <- merged_df
}

final_df <- do.call(rbind, merged_dfs)
final_df <- final_df[-which(final_df$X2 > 0.1),]


p1 <- ggplot(final_df, aes(x = KNOWN, y = REPRESENTATION)) + 
  geom_point(shape=1) + 
  geom_abline(slope=1, intercept=0,colour='#E41A1C') +
  scale_x_continuous(name="Donor Fraction of village (passed reads)", limits=c(0, 0.016)) +
  scale_y_continuous(name="Donor Fraction of village (inferred by Census-seq)", limits=c(0, 0.016)) +
  theme_minimal()

final_df$Percent_Error = abs(final_df$REPRESENTATION - final_df$KNOWN) / final_df$KNOWN * 100

final_df = final_df %>%
  group_by(KNOWN) %>%
  mutate(Median_Percent_Error = median(Percent_Error))

p2 <- ggplot(final_df, aes(x = KNOWN, y = Median_Percent_Error)) + 
  geom_point() + 
  geom_line() + 
  scale_x_continuous(name="Donor Fraction of village (passed reads)", limits=c(0, 0.0105)) +
  scale_y_continuous(name="Median Percent Error", limits=c(0, 100)) +
  theme_minimal()

p1 + p2

One Hundred Million Reads In Silico versus Inferred

Code
files <- list.files(path = "/projects/munger-lab/projects/census_seq_SSP2024/replication_runs/tiny_prop_hundred", pattern = "my.census.txt", recursive = TRUE, full.names = TRUE)
dirs <- dirname(files)

merged_dfs <- list()

for (dir in dirs) {
  # Construct the file paths
  file_path_txt <- file.path(dir, "my.census.txt")
  file_path_tsv <- file.path(dir, "census_report.tsv")
  df_txt <- read_tsv(file_path_txt, skip = 2)
  df_tsv <- read_tsv(file_path_tsv, col_names=FALSE)
  merged_df <- merge(df_txt, df_tsv, by.x = "DONOR", by.y = "X1", all.x = TRUE)
  merged_df$X2 = merged_df$X2 / 100000000
  
  merged_dfs[[length(merged_dfs) + 1]] <- merged_df
}

final_df <- do.call(rbind, merged_dfs)
final_df <- final_df[-which(final_df$X2 > 0.1),]


p1 <- ggplot(final_df, aes(x = X2, y = REPRESENTATION)) + 
  geom_point(shape=1) + 
  geom_abline(slope=1, intercept=0,colour='#E41A1C') +
  scale_x_continuous(name="Donor Fraction of village (in silico mixture)", limits=c(0, 0.016)) +
  scale_y_continuous(name="Donor Fraction of village (inferred by Census-seq)", limits=c(0, 0.016)) +
  theme_minimal()


final_df$Percent_Error = abs(final_df$REPRESENTATION - final_df$X2) / final_df$X2 * 100

final_df_hundred = final_df %>%
  group_by(X2) %>%
  mutate(Median_Percent_Error = median(Percent_Error))

p2 <- ggplot(final_df_hundred, aes(x = X2, y = Median_Percent_Error)) + 
  geom_point() + 
  geom_line() + 
  scale_x_continuous(name="Donor Fraction of village (in silico mixture)", limits=c(0, 0.0105)) +
  scale_y_continuous(name="Median Percent Error", limits=c(0, 100)) +
  theme_minimal()

p1 + p2

One Hundred Million Reads Passed Reads versus Inferred

Code
files <- list.files(path = "/flashscratch/munger-rea/tiny_prop_hundred", pattern = "my.census.txt", recursive = TRUE, full.names = TRUE)
dirs <- dirname(files)

merged_dfs <- list()

for (dir in dirs) {
  # Construct the file paths
  file_path_txt <- file.path(dir, "my.census.txt")
  file_path_tsv <- file.path(dir, "census_report.tsv")
  df_txt <- read_tsv(file_path_txt, skip = 2)
  df_tsv <- read_tsv(file_path_tsv, col_names=FALSE)
  merged_df <- merge(df_txt, df_tsv, by.x = "DONOR", by.y = "X1", all.x = TRUE)
  merged_df$X2 = merged_df$X2 / 100000000
  
  merged_dfs[[length(merged_dfs) + 1]] <- merged_df
}

final_df <- do.call(rbind, merged_dfs)
final_df <- final_df[-which(final_df$X2 > 0.1),]


p1 <- ggplot(final_df, aes(x = KNOWN, y = REPRESENTATION)) + 
  geom_point(shape=1) + 
  geom_abline(slope=1, intercept=0,colour='#E41A1C') +
  scale_x_continuous(name="Donor Fraction of village (passed reads)", limits=c(0, 0.016)) +
  scale_y_continuous(name="Donor Fraction of village (inferred by Census-seq)", limits=c(0, 0.016)) +
  theme_minimal()

final_df$Percent_Error = abs(final_df$REPRESENTATION - final_df$KNOWN) / final_df$KNOWN * 100

final_df = final_df %>%
  group_by(KNOWN) %>%
  mutate(Median_Percent_Error = median(Percent_Error))

p2 <- ggplot(final_df, aes(x = KNOWN, y = Median_Percent_Error)) + 
  geom_point() + 
  geom_line() + 
  scale_x_continuous(name="Donor Fraction of village (passed reads)", limits=c(0, 0.0105)) +
  scale_y_continuous(name="Median Percent Error", limits=c(0, 100)) +
  theme_minimal()

p1 + p2

Code
files <- list.files(path = "/projects/munger-lab/projects/census_seq_SSP2024/replication_runs/tiny_prop_hundred", pattern = "my.census.txt", recursive = TRUE, full.names = TRUE)
dirs <- dirname(files)

merged_dfs <- list()

for (dir in dirs) {
  # Construct the file paths
  file_path_txt <- file.path(dir, "my.census.txt")
  file_path_tsv <- file.path(dir, "census_report.tsv")
  df_txt <- read_tsv(file_path_txt, skip = 2)
  df_tsv <- read_tsv(file_path_tsv, col_names=FALSE)
  merged_df <- merge(df_txt, df_tsv, by.x = "DONOR", by.y = "X1", all.x = TRUE)
  merged_df$X2 = merged_df$X2 / 100000000
  
  merged_dfs[[length(merged_dfs) + 1]] <- merged_df
}

final_df <- do.call(rbind, merged_dfs)
final_df <- final_df[-which(final_df$X2 > 0.1),]


p1 <- ggplot(final_df, aes(x = X2, y = REPRESENTATION)) + 
  geom_point(shape=1) + 
  geom_abline(slope=1, intercept=0,colour='#E41A1C') +
  scale_x_continuous(name="Donor Fraction of village (in silico mixture)", limits=c(0, 0.016)) +
  scale_y_continuous(name="Donor Fraction of village (inferred by Census-seq)", limits=c(0, 0.016)) +
  theme_minimal()


final_df$Percent_Error = abs(final_df$REPRESENTATION - final_df$X2) / final_df$X2 * 100

final_df_hundred = final_df %>%
  group_by(X2) %>%
  mutate(Median_Percent_Error = median(Percent_Error))

p2 <- ggplot(final_df_hundred, aes(x = X2, y = Median_Percent_Error)) + 
  geom_point() + 
  geom_line() + 
  scale_x_continuous(name="Donor Fraction of village (in silico mixture)", limits=c(0, 0.0105)) +
  scale_y_continuous(name="Median Percent Error", limits=c(0, 100)) +
  theme_minimal()

p1 + p2

Code
coverage_graphs = function(paths,coverage){

files <- list.files(path = paths, pattern = "my.census.txt", recursive = TRUE, full.names = TRUE)
dirs <- dirname(files)

merged_dfs <- list()

for (dir in dirs) {
  # Construct the file paths
  file_path_txt <- file.path(dir, "my.census.txt")
  file_path_tsv <- file.path(dir, "census_report_final.tsv")
  df_txt <- read_tsv(file_path_txt, skip = 2)
  df_tsv <- read_tsv(file_path_tsv, col_names=FALSE)
  merged_df <- merge(df_txt, df_tsv, by.x = "DONOR", by.y = "X1", all.x = TRUE)
  
  merged_dfs[[length(merged_dfs) + 1]] <- merged_df
}

final_df <- do.call(rbind, merged_dfs)
final_df <- final_df[-which(final_df$X3 > 0.0105),]


final_df$Percent_Error = abs(final_df$REPRESENTATION - final_df$X3) / final_df$X3 * 100

final_df_c = final_df %>%
  group_by(X3) %>%
  mutate(Median_Percent_Error = median(Percent_Error))

final_df_c$Coverage = coverage
final_df_c = subset(final_df_c, select = c(DONOR,X3,Median_Percent_Error,Coverage))

return(final_df_c)
}
Code
df_0.1 <- coverage_graphs("/projects/munger-lab/projects/census_seq_SSP2024/coverage_dir/0.1","0.1x")
df_0.2 <- coverage_graphs("/projects/munger-lab/projects/census_seq_SSP2024/coverage_dir/0.2","0.2x")
df_0.3 <- coverage_graphs("/projects/munger-lab/projects/census_seq_SSP2024/coverage_dir/0.3","0.3x")
df_0.5 <- coverage_graphs("/projects/munger-lab/projects/census_seq_SSP2024/coverage_dir/0.5","0.5x")
df_1 <- coverage_graphs("/projects/munger-lab/projects/census_seq_SSP2024/coverage_dir/1","1x")
df_2 <- coverage_graphs("/projects/munger-lab/projects/census_seq_SSP2024/coverage_dir/2","2x")
df_3 <- coverage_graphs("/projects/munger-lab/projects/census_seq_SSP2024/coverage_dir/3","3x")
df_5 <- coverage_graphs("/projects/munger-lab/projects/census_seq_SSP2024/coverage_dir/5","5x")
df_10 <- coverage_graphs("/projects/munger-lab/projects/census_seq_SSP2024/coverage_dir/10","10x")
df_20 <- coverage_graphs("/projects/munger-lab/projects/census_seq_SSP2024/coverage_dir/20","20x")
df_30 <- coverage_graphs("/projects/munger-lab/projects/census_seq_SSP2024/coverage_dir/30","30x")


mega_merge <- rbind(df_0.1,df_0.2,df_0.3,df_0.5,df_1,df_2,df_3,df_5,df_10,df_20,df_30)

coverage_levels <- c("0.1x", "0.2x", "0.3x","0.5x","1x","2x","3x","5x", "10x", "20x", "30x")
mega_merge$Coverage <- factor(mega_merge$Coverage, levels = coverage_levels)

p2 <- ggplot(mega_merge, aes(x = X3, y = Median_Percent_Error)) + 
  geom_line(aes(color = Coverage)) +
  scale_x_continuous(name="Donor Fraction of village (in silico mixture)", limits=c(0, 0.0105)) +
  scale_y_continuous(name="Median Percent Error") +
  scale_color_manual(values = c("0.1x" = "#ff0000", "0.2x" = "#ff8700", "0.3x" = "#ffd300", "0.5x" = "#deff0a", "1x" = "#a1ff0a", "2x" = "#0aff99", "3x" = "#0aefff",
                                "5x" = "#147df5", "10x" = "#580aff", "30x" = "#be0aff", "20x" = "mediumpurple1")) +
  geom_point() + 
  theme_minimal()

p2

Code
ggsave("images/coverage.png")
Code
files <- list.files(path = "/projects/munger-lab/projects/census_seq_SSP2024/coverage_dir/10", pattern = "my.census.txt", recursive = TRUE, full.names = TRUE)
dirs <- dirname(files)

merged_dfs <- list()

for (dir in dirs) {
  # Construct the file paths
  file_path_txt <- file.path(dir, "my.census.txt")
  file_path_tsv <- file.path(dir, "census_report_final.tsv")
  df_txt <- read_tsv(file_path_txt, skip = 2)
  df_tsv <- read_tsv(file_path_tsv, col_names=FALSE)
  merged_df <- merge(df_txt, df_tsv, by.x = "DONOR", by.y = "X1", all.x = TRUE)
  
  merged_dfs[[length(merged_dfs) + 1]] <- merged_df
}

final_df <- do.call(rbind, merged_dfs)
final_df <- final_df[-which(final_df$X3 > 0.0105),]


p1 <- ggplot(final_df, aes(x = X3, y = REPRESENTATION)) + 
  geom_point(shape=1) + 
  geom_abline(slope=1, intercept=0,colour='#E41A1C') +
  scale_x_continuous(name="Donor Fraction of village (in silico mixture)", limits=c(0, 0.016)) +
  scale_y_continuous(name="Donor Fraction of village (inferred by Census-seq)", limits=c(0, 0.016)) +
  theme_minimal()


final_df$Percent_Error = abs(final_df$REPRESENTATION - final_df$X3) / final_df$X3 * 100

final_df_30 = final_df %>%
  group_by(X3) %>%
  mutate(Median_Percent_Error = median(Percent_Error))

p2 <- ggplot(final_df_30, aes(x = X3, y = Median_Percent_Error)) + 
  geom_point() + 
  geom_line() + 
  scale_x_continuous(name="Donor Fraction of village (in silico mixture)", limits=c(0, 0.0105)) +
  scale_y_continuous(name="Median Percent Error", limits=c(0, 100)) +
  theme_minimal()

final_df_30$Coverage = "30x"
final_df_30 = subset(final_df_30, select = c(DONOR,X3,Median_Percent_Error,Coverage))

p1 + p2

Code
ggsave("images/ten_coverage.png")
Code
files <- list.files(path = "/projects/munger-lab/projects/census_seq_SSP2024/coverage_dir/0.1", pattern = "my.census.txt", recursive = TRUE, full.names = TRUE)
dirs <- dirname(files)

merged_dfs <- list()

for (dir in dirs) {
  # Construct the file paths
  file_path_txt <- file.path(dir, "my.census.txt")
  file_path_tsv <- file.path(dir, "census_report_final.tsv")
  df_txt <- read_tsv(file_path_txt, skip = 2)
  df_tsv <- read_tsv(file_path_tsv, col_names=FALSE)
  merged_df <- merge(df_txt, df_tsv, by.x = "DONOR", by.y = "X1", all.x = TRUE)
  
  merged_dfs[[length(merged_dfs) + 1]] <- merged_df
}

final_df <- do.call(rbind, merged_dfs)
final_df <- final_df[-which(final_df$X3 > 0.1),]


p1 <- ggplot(final_df, aes(x = X3, y = REPRESENTATION)) + 
  geom_point(shape=1) + 
  geom_abline(slope=1, intercept=0,colour='#E41A1C') +
  scale_x_continuous(name="Donor Fraction of village (in silico mixture)", limits=c(0, 0.016)) +
  scale_y_continuous(name="Donor Fraction of village (inferred by Census-seq)", limits=c(0, 0.016)) +
  theme_minimal()


final_df$Percent_Error = abs(final_df$REPRESENTATION - final_df$X3) / final_df$X3 * 100

final_df_0.1 = final_df %>%
  group_by(X3) %>%
  mutate(Median_Percent_Error = median(Percent_Error))

p2 <- ggplot(final_df_0.1, aes(x = X3, y = Median_Percent_Error)) + 
  geom_point() + 
  geom_line() + 
  scale_x_continuous(name="Donor Fraction of village (in silico mixture)", limits=c(0, 0.0105)) +
  scale_y_continuous(name="Median Percent Error", limits=c(0, 100)) +
  theme_minimal()

final_df_0.1$Coverage = "0.1x"
final_df_0.1 = subset(final_df_0.1, select = c(DONOR,X3,Median_Percent_Error,Coverage))

p1 + p2

Code
files <- list.files(path = "/projects/munger-lab/projects/census_seq_SSP2024/coverage_dir/0.2", pattern = "my.census.txt", recursive = TRUE, full.names = TRUE)
dirs <- dirname(files)

merged_dfs <- list()

for (dir in dirs) {
  # Construct the file paths
  file_path_txt <- file.path(dir, "my.census.txt")
  file_path_tsv <- file.path(dir, "census_report_final.tsv")
  df_txt <- read_tsv(file_path_txt, skip = 2)
  df_tsv <- read_tsv(file_path_tsv, col_names=FALSE)
  merged_df <- merge(df_txt, df_tsv, by.x = "DONOR", by.y = "X1", all.x = TRUE)
  
  merged_dfs[[length(merged_dfs) + 1]] <- merged_df
}

final_df <- do.call(rbind, merged_dfs)
final_df <- final_df[-which(final_df$X3 > 0.1),]


p1 <- ggplot(final_df, aes(x = X3, y = REPRESENTATION)) + 
  geom_point(shape=1) + 
  geom_abline(slope=1, intercept=0,colour='#E41A1C') +
  scale_x_continuous(name="Donor Fraction of village (in silico mixture)", limits=c(0, 0.016)) +
  scale_y_continuous(name="Donor Fraction of village (inferred by Census-seq)", limits=c(0, 0.016)) +
  theme_minimal()


final_df$Percent_Error = abs(final_df$REPRESENTATION - final_df$X3) / final_df$X3 * 100

final_df_0.2 = final_df %>%
  group_by(X3) %>%
  mutate(Median_Percent_Error = median(Percent_Error))

p2 <- ggplot(final_df_0.2, aes(x = X3, y = Median_Percent_Error)) + 
  geom_point() + 
  geom_line() + 
  scale_x_continuous(name="Donor Fraction of village (in silico mixture)", limits=c(0, 0.0105)) +
  scale_y_continuous(name="Median Percent Error", limits=c(0, 100)) +
  theme_minimal()

final_df_0.2$Coverage = "5x"
final_df_0.2 = subset(final_df_0.2, select = c(DONOR,X3,Median_Percent_Error,Coverage))

p1 + p2

Code
library(plotly)

dens <- with(diamonds, tapply(price, INDEX = cut, density))
data <- data.frame(
  x = unlist(lapply(dens, "[[", "x")),
  y = unlist(lapply(dens, "[[", "y")),
  cut = rep(names(dens), each = length(dens[[1]]$x)))

fig <- plot_ly(data, x = ~x, y = ~y, z = ~cut, type = 'scatter3d', mode = 'lines', color = ~cut)

fig
Code
final_df_ten$COVERAGE = "Ten"
final_df_one$COVERAGE = "One"
final_df_hundred$COVERAGE = "Hundred"

final_df_hundred = subset(final_df_hundred, select = c(DONOR,X2,Median_Percent_Error,COVERAGE))
final_df_ten = subset(final_df_ten, select = c(DONOR,X2,Median_Percent_Error,COVERAGE))
final_df_one = subset(final_df_one, select = c(DONOR,X2,Median_Percent_Error,COVERAGE))


massive_df = rbind(final_df_hundred,final_df_one,final_df_ten)

massive_df = as.data.frame(massive_df)

fig <- plot_ly(massive_df, x = ~X2, y = ~Median_Percent_Error, z = ~COVERAGE, type = 'scatter3d', mode = 'lines', color = ~COVERAGE)

fig
Code
library(plotrix)
data <- c(4, 4, 5, 6, 12, 18, 24)

pie3D(data,
      col = c("#921915", "#B8514B", "#DFA5A1", "#E1E9ED", "#BFD4EA", "#6292C7", "#174694"),
      shade = 0.5)